Objective

Perform weighted gene correlation network analysis as originally described by Horvath et al. on delta (postvax/baseline) transcriptomic data from the KSPZV1 clinical trial.

References:

https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/ https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/JMiller/Tutorial%20document.pdf

Reviewer requested that we provide scatter plots for the correlations between modules and phenotypes. Review of the original code revealed that using Pearson’s or Spearman’s correlation to assess the relationship between modules and binary traits was not appropriate. Thus, for binary traits, we performed limma (empirical Bayes moderated t test) and presented differences as beeswarm plots with mean and 95%CI. For continuous variables, we performed Spearman’s correlation.

Load packages

library(edgeR)
library(readxl)
library(EDASeq)
library(Biobase)
library(WGCNA)
library(ape)
library(Cairo)
library(CorLevelPlot)
library(tidyverse)
library(igraph)
library(remotes)
library(fgsea)
library(data.table)
library(ggplot2)
library(viridis)
library(ggpubr)
library(googledrive)
allowWGCNAThreads()
## Allowing multi-threading with up to 10 threads.

Options and define variables

myCor <- "cor"
power <- 11.5
myMergeCutHeight <- 0.05 
myDeepSplit <- 2 
minModSize <- 20 
enforceMMS <- FALSE
cor.pval <- 0.05

Load ExpressionSet

#local file: "PfSPZ_cpm_ExpressionSet_230x23768_allGroups_bothBatches_delta_rmBatchFX_06082021_TMT.rds"

temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("17xx0YaggLiyWdJc9rqA1nkPSE7dj05p0"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
dim(x)
## Features  Samples 
##    23768      230

Make weighted gene correlation matrix based on full data set

WGCNA_matrix <- t(exprs(x))
blockSize(ncol(WGCNA_matrix), rectangularBlocks = TRUE, maxMemoryAllocation = 4^31)
## [1] 23768
par(mfrow=c(1,1))
plotClusterTreeSamples(datExpr=WGCNA_matrix)

## NULL
powers <- seq(4,20,by=0.5)
sft <- pickSoftThreshold(WGCNA_matrix, powerVector = powers, corFnc = myCor, verbose = 5, networkType ="signed", blockSize = ncol(WGCNA_matrix))
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 23768 of 23768
##    Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1    4.0    0.052 -2.48          0.931 1840.00   1820.00   2550
## 2    4.5    0.099 -2.69          0.920 1380.00   1360.00   2040
## 3    5.0    0.176 -2.84          0.925 1040.00   1020.00   1650
## 4    5.5    0.264 -2.90          0.934  789.00    771.00   1360
## 5    6.0    0.376 -3.07          0.949  604.00    585.00   1140
## 6    6.5    0.480 -3.15          0.961  466.00    446.00    959
## 7    7.0    0.565 -3.16          0.970  362.00    342.00    816
## 8    7.5    0.622 -3.11          0.970  283.00    264.00    699
## 9    8.0    0.658 -3.07          0.963  223.00    204.00    603
## 10   8.5    0.676 -3.03          0.950  177.00    159.00    523
## 11   9.0    0.702 -2.90          0.946  141.00    124.00    456
## 12   9.5    0.716 -2.79          0.936  114.00     96.90    399
## 13  10.0    0.743 -2.64          0.935   92.10     76.20    351
## 14  10.5    0.776 -2.47          0.941   75.20     60.20    310
## 15  11.0    0.807 -2.32          0.947   61.70     47.70    275
## 16  11.5    0.862 -2.15          0.972   51.00     38.00    244
## 17  12.0    0.866 -2.16          0.970   42.50     30.30    227
## 18  12.5    0.878 -2.17          0.975   35.60     24.30    212
## 19  13.0    0.880 -2.18          0.975   30.00     19.60    200
## 20  13.5    0.888 -2.17          0.978   25.40     15.80    189
## 21  14.0    0.904 -2.14          0.986   21.70     12.80    178
## 22  14.5    0.912 -2.12          0.988   18.60     10.30    170
## 23  15.0    0.916 -2.11          0.988   16.00      8.43    162
## 24  15.5    0.923 -2.08          0.989   13.80      6.89    155
## 25  16.0    0.930 -2.05          0.991   12.00      5.63    149
## 26  16.5    0.934 -2.03          0.990   10.50      4.62    143
## 27  17.0    0.936 -2.01          0.987    9.25      3.81    138
## 28  17.5    0.940 -1.99          0.987    8.16      3.14    132
## 29  18.0    0.945 -1.96          0.988    7.22      2.59    128
## 30  18.5    0.949 -1.94          0.987    6.42      2.14    123
## 31  19.0    0.950 -1.92          0.986    5.73      1.77    119
## 32  19.5    0.955 -1.89          0.986    5.13      1.47    115
## 33  20.0    0.958 -1.87          0.986    4.61      1.23    111
sft$powerEstimate
## [1] 11.5
par(mfrow=c(1,1))
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab='Soft Threshold (power)',ylab='Scale Free Topology Model Fit,signed R²',
     type='n', main = paste('Scale independence'));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=1,col='red'); abline(h=0.90,col='red')

Run an automated network analysis

net <- blockwiseModules(WGCNA_matrix,
                        power=power,
                        deepSplit=myDeepSplit,
                        minModuleSize=minModSize,
                        TOMType="none", 
                        mergeCutHeight=myMergeCutHeight,
                        TOMDenom="mean",
                        detectCutHeight=0.995,
                        corType="pearson",
                        networkType="signed",
                        pamStage=TRUE,
                        pamRespectsDendro=TRUE,
                        reassignThresh=0.05,
                        verbose=5,
                        saveTOMs=FALSE,
                        maxBlockSize=ncol(WGCNA_matrix),
                        nThreads = 0)
##  Calculating module eigengenes block-wise from all genes
##    Flagging genes and samples with too many missing values...
##     ..step 1
##  ..Working on block 1 .
##     TOM calculation: adjacency..
##     ..will use 10 parallel threads.
##      Fraction of slow calculations: 0.000000
## 
##  ....clustering..
##  ....detecting modules..
##      ..done.
##  ....calculating module eigengenes..
##      moduleEigengenes : Working on ME for module 1
##      moduleEigengenes : Working on ME for module 2
##      moduleEigengenes : Working on ME for module 3
##      moduleEigengenes : Working on ME for module 4
##      moduleEigengenes : Working on ME for module 5
##      moduleEigengenes : Working on ME for module 6
##      moduleEigengenes : Working on ME for module 7
##      moduleEigengenes : Working on ME for module 8
##      moduleEigengenes : Working on ME for module 9
##      moduleEigengenes : Working on ME for module 10
##      moduleEigengenes : Working on ME for module 11
##      moduleEigengenes : Working on ME for module 12
##      moduleEigengenes : Working on ME for module 13
##      moduleEigengenes : Working on ME for module 14
##      moduleEigengenes : Working on ME for module 15
##      moduleEigengenes : Working on ME for module 16
##      moduleEigengenes : Working on ME for module 17
##      moduleEigengenes : Working on ME for module 18
##      moduleEigengenes : Working on ME for module 19
##      moduleEigengenes : Working on ME for module 20
##      moduleEigengenes : Working on ME for module 21
##      moduleEigengenes : Working on ME for module 22
##      moduleEigengenes : Working on ME for module 23
##      moduleEigengenes : Working on ME for module 24
##      moduleEigengenes : Working on ME for module 25
##      moduleEigengenes : Working on ME for module 26
##      moduleEigengenes : Working on ME for module 27
##      moduleEigengenes : Working on ME for module 28
##      moduleEigengenes : Working on ME for module 29
##      moduleEigengenes : Working on ME for module 30
##      moduleEigengenes : Working on ME for module 31
##      moduleEigengenes : Working on ME for module 32
##      moduleEigengenes : Working on ME for module 33
##      moduleEigengenes : Working on ME for module 34
##      moduleEigengenes : Working on ME for module 35
##      moduleEigengenes : Working on ME for module 36
##      moduleEigengenes : Working on ME for module 37
##      moduleEigengenes : Working on ME for module 38
##      moduleEigengenes : Working on ME for module 39
##      moduleEigengenes : Working on ME for module 40
##      moduleEigengenes : Working on ME for module 41
##      moduleEigengenes : Working on ME for module 42
##      moduleEigengenes : Working on ME for module 43
##      moduleEigengenes : Working on ME for module 44
##      moduleEigengenes : Working on ME for module 45
##      moduleEigengenes : Working on ME for module 46
##      moduleEigengenes : Working on ME for module 47
##      moduleEigengenes : Working on ME for module 48
##      moduleEigengenes : Working on ME for module 49
##      moduleEigengenes : Working on ME for module 50
##      moduleEigengenes : Working on ME for module 51
##      moduleEigengenes : Working on ME for module 52
##      moduleEigengenes : Working on ME for module 53
##      moduleEigengenes : Working on ME for module 54
##      moduleEigengenes : Working on ME for module 55
##      moduleEigengenes : Working on ME for module 56
##      moduleEigengenes : Working on ME for module 57
##      moduleEigengenes : Working on ME for module 58
##      moduleEigengenes : Working on ME for module 59
##      moduleEigengenes : Working on ME for module 60
##      moduleEigengenes : Working on ME for module 61
##      moduleEigengenes : Working on ME for module 62
##      moduleEigengenes : Working on ME for module 63
##      moduleEigengenes : Working on ME for module 64
##      moduleEigengenes : Working on ME for module 65
##      moduleEigengenes : Working on ME for module 66
##      moduleEigengenes : Working on ME for module 67
##      moduleEigengenes : Working on ME for module 68
##      moduleEigengenes : Working on ME for module 69
##      moduleEigengenes : Working on ME for module 70
##      moduleEigengenes : Working on ME for module 71
##      moduleEigengenes : Working on ME for module 72
##      moduleEigengenes : Working on ME for module 73
##      moduleEigengenes : Working on ME for module 74
##      moduleEigengenes : Working on ME for module 75
##      moduleEigengenes : Working on ME for module 76
##      moduleEigengenes : Working on ME for module 77
##      moduleEigengenes : Working on ME for module 78
##      moduleEigengenes : Working on ME for module 79
##      moduleEigengenes : Working on ME for module 80
##      moduleEigengenes : Working on ME for module 81
##      moduleEigengenes : Working on ME for module 82
##      moduleEigengenes : Working on ME for module 83
##      moduleEigengenes : Working on ME for module 84
##      moduleEigengenes : Working on ME for module 85
##      moduleEigengenes : Working on ME for module 86
##      moduleEigengenes : Working on ME for module 87
##      moduleEigengenes : Working on ME for module 88
##      moduleEigengenes : Working on ME for module 89
##      moduleEigengenes : Working on ME for module 90
##      moduleEigengenes : Working on ME for module 91
##      moduleEigengenes : Working on ME for module 92
##      moduleEigengenes : Working on ME for module 93
##      moduleEigengenes : Working on ME for module 94
##      moduleEigengenes : Working on ME for module 95
##      moduleEigengenes : Working on ME for module 96
##      moduleEigengenes : Working on ME for module 97
##      moduleEigengenes : Working on ME for module 98
##      moduleEigengenes : Working on ME for module 99
##      moduleEigengenes : Working on ME for module 100
##      moduleEigengenes : Working on ME for module 101
##      moduleEigengenes : Working on ME for module 102
##      moduleEigengenes : Working on ME for module 103
##      moduleEigengenes : Working on ME for module 104
##      moduleEigengenes : Working on ME for module 105
##      moduleEigengenes : Working on ME for module 106
##      moduleEigengenes : Working on ME for module 107
##      moduleEigengenes : Working on ME for module 108
##      moduleEigengenes : Working on ME for module 109
##      moduleEigengenes : Working on ME for module 110
##      moduleEigengenes : Working on ME for module 111
##      moduleEigengenes : Working on ME for module 112
##      moduleEigengenes : Working on ME for module 113
##      moduleEigengenes : Working on ME for module 114
##      moduleEigengenes : Working on ME for module 115
##      moduleEigengenes : Working on ME for module 116
##      moduleEigengenes : Working on ME for module 117
##      moduleEigengenes : Working on ME for module 118
##      moduleEigengenes : Working on ME for module 119
##      moduleEigengenes : Working on ME for module 120
##      moduleEigengenes : Working on ME for module 121
##      moduleEigengenes : Working on ME for module 122
##  ....checking kME in modules..
##      ..removing 32 genes from module 2 because their KME is too low.
##      ..removing 18 genes from module 3 because their KME is too low.
##      ..removing 1 genes from module 4 because their KME is too low.
##      ..removing 1 genes from module 8 because their KME is too low.
##      ..removing 2 genes from module 18 because their KME is too low.
##      ..removing 1 genes from module 20 because their KME is too low.
##      ..removing 1 genes from module 21 because their KME is too low.
##      ..removing 3 genes from module 23 because their KME is too low.
##      ..removing 2 genes from module 33 because their KME is too low.
##      ..removing 2 genes from module 49 because their KME is too low.
##      ..removing 1 genes from module 50 because their KME is too low.
##   ..reassigning 15 genes from module 1 to modules with higher KME.
##   ..reassigning 216 genes from module 2 to modules with higher KME.
##   ..reassigning 131 genes from module 3 to modules with higher KME.
##   ..reassigning 62 genes from module 4 to modules with higher KME.
##   ..reassigning 105 genes from module 5 to modules with higher KME.
##   ..reassigning 86 genes from module 6 to modules with higher KME.
##   ..reassigning 47 genes from module 7 to modules with higher KME.
##   ..reassigning 47 genes from module 8 to modules with higher KME.
##   ..reassigning 55 genes from module 9 to modules with higher KME.
##   ..reassigning 37 genes from module 10 to modules with higher KME.
##   ..reassigning 14 genes from module 11 to modules with higher KME.
##   ..reassigning 54 genes from module 12 to modules with higher KME.
##   ..reassigning 28 genes from module 13 to modules with higher KME.
##   ..reassigning 67 genes from module 14 to modules with higher KME.
##   ..reassigning 36 genes from module 15 to modules with higher KME.
##   ..reassigning 43 genes from module 16 to modules with higher KME.
##   ..reassigning 35 genes from module 17 to modules with higher KME.
##   ..reassigning 29 genes from module 18 to modules with higher KME.
##   ..reassigning 35 genes from module 19 to modules with higher KME.
##   ..reassigning 48 genes from module 20 to modules with higher KME.
##   ..reassigning 30 genes from module 21 to modules with higher KME.
##   ..reassigning 21 genes from module 22 to modules with higher KME.
##   ..reassigning 41 genes from module 23 to modules with higher KME.
##   ..reassigning 20 genes from module 24 to modules with higher KME.
##   ..reassigning 47 genes from module 25 to modules with higher KME.
##   ..reassigning 21 genes from module 26 to modules with higher KME.
##   ..reassigning 3 genes from module 27 to modules with higher KME.
##   ..reassigning 51 genes from module 28 to modules with higher KME.
##   ..reassigning 22 genes from module 29 to modules with higher KME.
##   ..reassigning 18 genes from module 30 to modules with higher KME.
##   ..reassigning 29 genes from module 31 to modules with higher KME.
##   ..reassigning 35 genes from module 32 to modules with higher KME.
##   ..reassigning 37 genes from module 33 to modules with higher KME.
##   ..reassigning 30 genes from module 34 to modules with higher KME.
##   ..reassigning 35 genes from module 35 to modules with higher KME.
##   ..reassigning 23 genes from module 36 to modules with higher KME.
##   ..reassigning 39 genes from module 37 to modules with higher KME.
##   ..reassigning 40 genes from module 38 to modules with higher KME.
##   ..reassigning 16 genes from module 39 to modules with higher KME.
##   ..reassigning 39 genes from module 40 to modules with higher KME.
##   ..reassigning 22 genes from module 41 to modules with higher KME.
##   ..reassigning 23 genes from module 42 to modules with higher KME.
##   ..reassigning 13 genes from module 43 to modules with higher KME.
##   ..reassigning 17 genes from module 44 to modules with higher KME.
##   ..reassigning 28 genes from module 45 to modules with higher KME.
##   ..reassigning 13 genes from module 46 to modules with higher KME.
##   ..reassigning 10 genes from module 47 to modules with higher KME.
##   ..reassigning 7 genes from module 48 to modules with higher KME.
##   ..reassigning 8 genes from module 49 to modules with higher KME.
##   ..reassigning 37 genes from module 50 to modules with higher KME.
##   ..reassigning 10 genes from module 51 to modules with higher KME.
##   ..reassigning 24 genes from module 52 to modules with higher KME.
##   ..reassigning 23 genes from module 53 to modules with higher KME.
##   ..reassigning 28 genes from module 54 to modules with higher KME.
##   ..reassigning 28 genes from module 55 to modules with higher KME.
##   ..reassigning 19 genes from module 56 to modules with higher KME.
##   ..reassigning 17 genes from module 57 to modules with higher KME.
##   ..reassigning 12 genes from module 58 to modules with higher KME.
##   ..reassigning 15 genes from module 59 to modules with higher KME.
##   ..reassigning 22 genes from module 60 to modules with higher KME.
##   ..reassigning 13 genes from module 61 to modules with higher KME.
##   ..reassigning 20 genes from module 62 to modules with higher KME.
##   ..reassigning 16 genes from module 63 to modules with higher KME.
##   ..reassigning 4 genes from module 64 to modules with higher KME.
##   ..reassigning 17 genes from module 65 to modules with higher KME.
##   ..reassigning 7 genes from module 66 to modules with higher KME.
##   ..reassigning 14 genes from module 67 to modules with higher KME.
##   ..reassigning 22 genes from module 68 to modules with higher KME.
##   ..reassigning 16 genes from module 69 to modules with higher KME.
##   ..reassigning 18 genes from module 70 to modules with higher KME.
##   ..reassigning 21 genes from module 71 to modules with higher KME.
##   ..reassigning 12 genes from module 72 to modules with higher KME.
##   ..reassigning 12 genes from module 73 to modules with higher KME.
##   ..reassigning 15 genes from module 74 to modules with higher KME.
##   ..reassigning 17 genes from module 75 to modules with higher KME.
##   ..reassigning 6 genes from module 76 to modules with higher KME.
##   ..reassigning 6 genes from module 77 to modules with higher KME.
##   ..reassigning 13 genes from module 78 to modules with higher KME.
##   ..reassigning 10 genes from module 79 to modules with higher KME.
##   ..reassigning 6 genes from module 80 to modules with higher KME.
##   ..reassigning 16 genes from module 81 to modules with higher KME.
##   ..reassigning 6 genes from module 82 to modules with higher KME.
##   ..reassigning 13 genes from module 83 to modules with higher KME.
##   ..reassigning 8 genes from module 84 to modules with higher KME.
##   ..reassigning 3 genes from module 85 to modules with higher KME.
##   ..reassigning 4 genes from module 86 to modules with higher KME.
##   ..reassigning 3 genes from module 87 to modules with higher KME.
##   ..reassigning 6 genes from module 88 to modules with higher KME.
##   ..reassigning 8 genes from module 89 to modules with higher KME.
##   ..reassigning 10 genes from module 90 to modules with higher KME.
##   ..reassigning 7 genes from module 91 to modules with higher KME.
##   ..reassigning 11 genes from module 92 to modules with higher KME.
##   ..reassigning 14 genes from module 93 to modules with higher KME.
##   ..reassigning 9 genes from module 94 to modules with higher KME.
##   ..reassigning 6 genes from module 95 to modules with higher KME.
##   ..reassigning 2 genes from module 96 to modules with higher KME.
##   ..reassigning 6 genes from module 97 to modules with higher KME.
##   ..reassigning 11 genes from module 98 to modules with higher KME.
##   ..reassigning 4 genes from module 99 to modules with higher KME.
##   ..reassigning 6 genes from module 100 to modules with higher KME.
##   ..reassigning 8 genes from module 101 to modules with higher KME.
##   ..reassigning 1 genes from module 102 to modules with higher KME.
##   ..reassigning 5 genes from module 103 to modules with higher KME.
##   ..reassigning 7 genes from module 104 to modules with higher KME.
##   ..reassigning 1 genes from module 105 to modules with higher KME.
##   ..reassigning 2 genes from module 106 to modules with higher KME.
##   ..reassigning 9 genes from module 107 to modules with higher KME.
##   ..reassigning 5 genes from module 108 to modules with higher KME.
##   ..reassigning 4 genes from module 109 to modules with higher KME.
##   ..reassigning 6 genes from module 110 to modules with higher KME.
##   ..reassigning 3 genes from module 111 to modules with higher KME.
##   ..reassigning 5 genes from module 112 to modules with higher KME.
##   ..reassigning 1 genes from module 113 to modules with higher KME.
##   ..reassigning 2 genes from module 114 to modules with higher KME.
##   ..reassigning 4 genes from module 115 to modules with higher KME.
##   ..reassigning 3 genes from module 116 to modules with higher KME.
##   ..reassigning 6 genes from module 117 to modules with higher KME.
##   ..reassigning 1 genes from module 118 to modules with higher KME.
##   ..reassigning 2 genes from module 119 to modules with higher KME.
##  ..merging modules that are too close..
##      mergeCloseModules: Merging modules whose distance is less than 0.05
##        multiSetMEs: Calculating module MEs.
##          Working on set 1 ...
##          moduleEigengenes: Calculating 122 module eigengenes in given set.
##        Calculating new MEs...
##        multiSetMEs: Calculating module MEs.
##          Working on set 1 ...
##          moduleEigengenes: Calculating 122 module eigengenes in given set.
# Summary Module Table
nModules <- length(table(net$colors))-1
modules <- cbind(colnames(as.matrix(table(net$colors))),table(net$colors))
orderedModules <- cbind(Mnum=paste("M",seq(1:nModules),sep=""),Color=labels2colors(c(1:nModules)))
modules <- modules[match(as.character(orderedModules[,2]),rownames(modules)),]
tmpMEs <- MEs <- net$MEs
colnames(tmpMEs) <- paste("ME",colnames(MEs),sep="")
kMEdat <- signedKME(WGCNA_matrix, tmpMEs, corFnc=myCor) #calculate (signed) eigengene-based connectivity, also known as module membership
WGCNA_dat <- cbind(fData(x)$GeneSymbol, colnames(WGCNA_matrix),net$colors,kMEdat) %>%
  as.data.frame() %>%
  dplyr::rename(GeneSymbol = "fData(x)$GeneSymbol") %>%
  dplyr::rename(ENSEMBLID = "colnames(WGCNA_matrix)") %>%
  dplyr::rename(ModuleColors = "net$colors")

Correlate modules with traits

# Define numbers of genes and samples
nGenes = ncol(WGCNA_matrix)
nSamples = nrow(WGCNA_matrix)

datvar <- pData(x) %>%
  dplyr::select(PATID, SEQBATCH, site, SEX, age.vax1, mal.vax.1, treat, mal.atp.3, tte.mal.atp.6, mal.dvax, mal.dvax.tot, pfcsp_pre, pfcsp_post, log2FC_CSPAb) %>%
  mutate('Gender (female)' = factor(ifelse(SEX == "F", 1, 0))) %>%
  mutate('Site (Siaya)' = factor(ifelse(site == "Siaya", 1, 0))) %>%
  dplyr::rename('Pf infection at first vax' = "mal.vax.1") %>%
  mutate(pfcsp_pre = ifelse(is.na(pfcsp_pre), median(pfcsp_pre, na.rm=TRUE), pfcsp_pre)) %>%
  mutate(pfcsp_post = ifelse(is.na(pfcsp_post), median(pfcsp_post, na.rm=TRUE), pfcsp_post)) %>%
  mutate(mal.dvax.tot = ifelse(is.na(mal.dvax.tot), median(mal.dvax.tot, na.rm=TRUE), mal.dvax.tot)) %>%
  mutate('pre-vax anti-CSP IgG' = log10(pfcsp_pre+1)) %>%
  mutate('post-vax anti-CSP IgG' = log10(pfcsp_post+1)) %>%
  mutate('log2FC anti-CSP IgG' = log2((pfcsp_post+1)/(pfcsp_pre+1))) %>%
  mutate('1.8 x 10^6 PfSPZ' = factor(ifelse(treat == "1.8 x 10^6 PfSPZ", 1, 0))) %>%
  mutate('parasitemic events during vax period' = mal.dvax.tot) %>%
  mutate('uninfected, 3 months' = factor(ifelse(mal.atp.3 == 0, 1, 0))) %>%
  dplyr::rename('days to first parasitemia' = "tte.mal.atp.6") %>%
  mutate(Age = age.vax1) %>%
  dplyr::select(PATID, Age, site, SEX, treat, 'pre-vax anti-CSP IgG', 'parasitemic events during vax period', 'uninfected, 3 months', 'days to first parasitemia', 'log2FC anti-CSP IgG') %>% #delta
  as_tibble() %>%
  column_to_rownames(var = "PATID")

modTraitCor <- cor(orderMEs(net$MEs), datvar, use = "p")
modTraitP <- corPvalueStudent(modTraitCor, nSamples)
#Since we have a moderately large number of modules and traits, a suitable graphical representation will help in reading the table. We color code each association by the correlation value: Will display correlations and their p-values

#Select out only modules that have P<0.05 in Protection
modTraitP.temp <- modTraitP %>%
  as.data.frame() %>%
  rownames_to_column(var = "Module") %>%
  filter(.$'uninfected, 3 months' < cor.pval | .$'days to first parasitemia' < cor.pval)
modTraitCor.select <- modTraitCor[modTraitP.temp$Module,]
modTraitP.select <- modTraitP[modTraitP.temp$Module,]
textMatrix <- paste(signif(modTraitCor.select, 2), "\n(P=",
                   signif(modTraitP.select, 1), ")", sep = "")
dim(textMatrix) <- dim(modTraitCor.select)

topmodules <- chooseTopHubInEachModule(WGCNA_matrix, net$colors, omitColors = "grey", power = 12.5, type ="signed")

Display the correlation values within a heatmap plot (original Figure 4B of JCI revision)

par(mar = c(11, 9, 1, 1))
labeledHeatmap(Matrix = modTraitCor.select, xLabels = names(datvar),
               yLabels = rownames(modTraitCor.select), ySymbols = rownames(modTraitCor.select),
               colorLabels =FALSE,colors=blueWhiteRed(100),textMatrix=textMatrix,
               setStdMargins = FALSE, zlim = c(-1,1),
               main = paste("Module-trait relationships"),xLabelsAngle = 45) 

Identify hub genes

myColors <- gsub("ME", "", rownames(modTraitCor.select))
topmodules <- chooseTopHubInEachModule(WGCNA_matrix, net$colors, omitColors = "grey", power = 12.5, type ="signed")
mytopmodules <- topmodules[myColors] %>%
  as.data.frame() %>%
  dplyr::rename(EnsemblID = ".") %>%
  rownames_to_column("module_label") %>%
  as_tibble() %>%
  left_join(., fData(x) %>%
              dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")
devtools::source_url("https://github.com/jtlovell/limmaDE2/blob/master/R/wgcna2igraph.R?raw=TRUE")
graph <- wgcna2igraph(net = net, WGCNA_matrix, modules2plot = myColors,
                      colors2plot = myColors,
                      kME.threshold = 0.5, adjacency.threshold = 0.1,
                      adj.power = power, verbose = T,
                      node.size = 1, frame.color = NA, node.color = scales::muted("red"),
                      edge.alpha = .7, edge.width = 0.5)
## using KME values to find genes with high module membership
## subsetting to modules: lightgreen, paleturquoise, lightblue4, coral4, salmon2, brown4, midnightblue 
## culling edges by adjacency
## removing unconnected nodes
## coverting to igraph format
## returning a network with 371 nodes and 10861 edges
hubscores <- hub_score(graph, scale = TRUE, weights = NULL,
  options = arpack_defaults)

Top modules and hub genes (rownames for Figure 4E of pre-print)

module_label EnsemblID GeneSymbol
lightgreen ENSG00000012124 CD22
paleturquoise ENSG00000186265 BTLA
lightblue4 ENSG00000255641 NKG2-E
coral4 ENSG00000012817 KDM5D
salmon2 ENSG00000153827 TRIP12
brown4 ENSG00000008952 SEC62
midnightblue ENSG00000101782 RIOK3

Plot network graph of significant modules (Figure 4C of JCI revision)

Network graphs of significant modules containing nodes (red dots) and edges (lines) meeting minimum thresholds. Correlations between nodes in different modules are shown as black edges.

Make X-Y scatterplots between MEs and variables

This was requested by a reviewer.

First find significant Spearman correlations (p<0.05)

all_modules <- topmodules %>%
  as.data.frame() %>%
  dplyr::rename(EnsemblID = ".") %>%
  rownames_to_column("module_color") %>%
  as_tibble() %>%
  left_join(., fData(x) %>%
              dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")

all_MEs <- MEs %>%  #MEs[,names(lapply(WGCNA_dat_select, nrow))] %>%
  rownames_to_column(var = "subject_id") %>%
  left_join(., datvar %>%
              rownames_to_column(var = "subject_id"),
            by = "subject_id") %>%
  pivot_longer(., cols = c("pre-vax anti-CSP IgG", "parasitemic events during vax period", "days to first parasitemia",  "log2FC anti-CSP IgG"), names_to = "trait", values_to = "trait_value") %>%
  pivot_longer(., cols = contains("ME"), names_to = "module_label", values_to = "module_eigenvalue") %>%
  mutate(module_color = gsub("ME", "", module_label)) %>%
  left_join(., all_modules,
            by = "module_color")

cor_dat_all <- all_MEs %>%
  group_by(GeneSymbol, trait) %>%
  summarize(rho = cor.test(module_eigenvalue, trait_value, method = "spearman")$estimate,
            p = cor.test(module_eigenvalue, trait_value, method = "spearman")$p.value)
## Warning: There were 976 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `rho = cor.test(module_eigenvalue, trait_value, method =
##   "spearman")$estimate`.
## ℹ In group 1: `GeneSymbol = "ABCD2"`, `trait = "days to first parasitemia"`.
## Caused by warning in `cor.test.default()`:
## ! Cannot compute exact p-value with ties
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 975 remaining warnings.
## `summarise()` has grouped output by 'GeneSymbol'. You can override using the
## `.groups` argument.
cor_dat_signif <- cor_dat_all %>%
  filter(trait == "days to first parasitemia" & p<0.05)

spearman_signif_hub_genes <- cor_dat_signif$GeneSymbol

Limma method

Prepare data for differential module eigengene analysis

diffME_dat <- orderMEs(net$MEs) %>% 
  rownames_to_column(var = "subject") %>%
  mutate(subject = gsub("\\_0", "", subject)) %>%
  right_join(., datvar %>%
               dplyr::select(-c(Age, "days to first parasitemia")) %>%
               mutate("parasitemic during vax period" = factor(ifelse(.$"parasitemic events during vax period" > 0, "parasitemic", "not_parasitemic"),
                                                      levels = c("not_parasitemic","parasitemic"))) %>%
               mutate("uninfected, 3 months" = factor(.$"uninfected, 3 months", levels = c(0,1),
                                                      labels = c("infected","never infected"))) %>%
               mutate("pre-vax anti-CSP IgG" = factor(ifelse(.$"pre-vax anti-CSP IgG" > 0, "present", "absent"),
                                                      levels = c("absent","present"))) %>%
               mutate("log2FC anti-CSP IgG" = factor(ifelse(.$"log2FC anti-CSP IgG" > 12.5, "high responder", "low responder"),
                                                      levels = c("low responder","high responder"))) %>%
               dplyr::rename("fold-change anti-CSP IgG" = "log2FC anti-CSP IgG") %>%
               rownames_to_column(var = "subject"),
             by = "subject") %>%
  dplyr::select(-c("parasitemic events during vax period")) %>%
  pivot_longer(cols = contains("ME"), names_to = "module", values_to = "MEs") %>%
  pivot_longer(cols = c("pre-vax anti-CSP IgG":"parasitemic during vax period"), names_to = "trait", values_to = "trait_cat") %>%
  arrange(module, site, treat, trait, trait_cat) %>%
  drop_na(trait_cat)

diffME_dat_samplesize <- diffME_dat %>%
  group_by(module, treat, trait, trait_cat) %>%
  summarize(n=n()) %>% 
  ungroup()
## `summarise()` has grouped output by 'module', 'treat', 'trait'. You can
## override using the `.groups` argument.
pheno_dat <- diffME_dat %>%
  dplyr::select(subject, site, treat, trait, trait_cat) %>%
  distinct(subject, site, treat, trait, trait_cat) %>%
  pivot_wider(names_from = trait, values_from = trait_cat) %>%
  arrange(subject) %>%
  column_to_rownames(var = "subject")  %>%
  droplevels()

feature_dat <- diffME_dat %>%
  dplyr::select(module, MEs) %>%
  distinct(module) %>%
  mutate(module_color = gsub("ME","", module)) %>%
  left_join(., topmodules %>%
  enframe() %>%
  dplyr::rename(EnsemblID = "value",
                module_color = "name") %>%
  left_join(., fData(x) %>%
              dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")) %>%
  column_to_rownames(var = "module") %>%
  dplyr::rename(hub_gene = "GeneSymbol")
## Joining with `by = join_by(module_color)`
exprs_dat <- orderMEs(net$MEs) %>% 
  rownames_to_column(var = "subject") %>%
  mutate(subject = gsub("\\_0", "", subject)) %>%
  arrange(subject) %>%
  column_to_rownames(var = "subject") %>%
  dplyr::select(rownames(feature_dat)) %>%
  t() 
  
#check
ifelse(all(colnames(exprs_dat) == rownames(pheno_dat)) &
         all(rownames(exprs_dat) == rownames(feature_dat)),
       "Good to go!",
       "Check for matching names")
## [1] "Good to go!"
#make expression set
wgcna_eset <- ExpressionSet(exprs_dat, phenoData = AnnotatedDataFrame(pheno_dat), featureData = AnnotatedDataFrame(feature_dat))
#Determine differences between module eigenvalues as binary using limma

#Pf_baseline
pf_during_vax <- factor(wgcna_eset$`parasitemic during vax period`) #encodes a vector as a factor
treatment <- wgcna_eset$treat
site <- factor(wgcna_eset$site)
design <- model.matrix(~0+pf_during_vax+site+treatment) 
colnames(design)
## [1] "pf_during_vaxnot_parasitemic" "pf_during_vaxparasitemic"    
## [3] "siteWagai"                    "treatment4.5 x 10^5 PfSPZ"   
## [5] "treatment9.0 x 10^5 PfSPZ"    "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("uninfected_during_vax", "infected_during_vax","wagai", "lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(infected_during_vax - uninfected_during_vax,
                           levels=design) 
contrasts
##                        Contrasts
## Levels                  infected_during_vax - uninfected_during_vax
##   uninfected_during_vax                                          -1
##   infected_during_vax                                             1
##   wagai                                                           0
##   lowdose                                                         0
##   mediumdose                                                      0
##   highdose                                                        0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)

Pf_during_vax <- topTable(fit2, coef="infected_during_vax - uninfected_during_vax", number = Inf) %>%
  rownames_to_column("module") %>%
  right_join(., feature_dat %>%
               rownames_to_column("module"),
             by = "module") %>%
  mutate(comparison = "infected_vs_uninfected_during_vax") %>%
  dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
head(Pf_during_vax)
##                          comparison          module  module_color hub_gene
## 1 infected_vs_uninfected_during_vax  MEmidnightblue  midnightblue    RIOK3
## 2 infected_vs_uninfected_during_vax        MEbrown4        brown4    SEC62
## 3 infected_vs_uninfected_during_vax MEdarkseagreen3 darkseagreen3    GHITM
## 4 infected_vs_uninfected_during_vax        MEyellow        yellow    RNF10
## 5 infected_vs_uninfected_during_vax       MEsalmon2       salmon2   TRIP12
## 6 infected_vs_uninfected_during_vax      MEdarkgrey      darkgrey     FLNC
##         logFC      P.Value  adj.P.Val
## 1 -0.03001621 0.0006197385 0.07388096
## 2 -0.02838852 0.0012111633 0.07388096
## 3 -0.02682087 0.0022262108 0.09053257
## 4 -0.02529874 0.0039862594 0.10416229
## 5 -0.02509221 0.0042689465 0.10416229
## 6  0.02090366 0.0171855955 0.34944044
#prevax_antiCSPIgG
prevax_antiCSPIgG <- factor(wgcna_eset$`pre-vax anti-CSP IgG`)
design <- model.matrix(~0+prevax_antiCSPIgG+site+treatment)
colnames(design)
## [1] "prevax_antiCSPIgGabsent"   "prevax_antiCSPIgGpresent" 
## [3] "siteWagai"                 "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("noCSPAb_first_vax", "CSPAb_first_vax","wagai","lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(CSPAb_first_vax - noCSPAb_first_vax,
                           levels=design) 
contrasts
##                    Contrasts
## Levels              CSPAb_first_vax - noCSPAb_first_vax
##   noCSPAb_first_vax                                  -1
##   CSPAb_first_vax                                     1
##   wagai                                               0
##   lowdose                                             0
##   mediumdose                                          0
##   highdose                                            0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)

CSPAb_baseline <- topTable(fit2, coef="CSPAb_first_vax - noCSPAb_first_vax", number = Inf) %>%
  rownames_to_column("module") %>%
  right_join(., feature_dat %>%
               rownames_to_column("module"),
             by = "module") %>%
  mutate(comparison = "present_vs_absent_baseline") %>%
  dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
head(CSPAb_baseline)
##                   comparison          module  module_color hub_gene       logFC
## 1 present_vs_absent_baseline    MEchocolate4    chocolate4     SWT1 -0.03090976
## 2 present_vs_absent_baseline   MElightyellow   lightyellow   PRRC2B  0.02811778
## 3 present_vs_absent_baseline      MEdeeppink      deeppink  RSL24D1 -0.02779748
## 4 present_vs_absent_baseline MEmediumpurple2 mediumpurple2 ARHGEF18  0.02731979
## 5 present_vs_absent_baseline         MEplum4         plum4    TRAM1 -0.02592426
## 6 present_vs_absent_baseline MEdarkseagreen3 darkseagreen3    GHITM -0.02565887
##        P.Value adj.P.Val
## 1 0.0009655604 0.1074472
## 2 0.0026926506 0.1074472
## 3 0.0030121857 0.1074472
## 4 0.0035228597 0.1074472
## 5 0.0055977165 0.1225999
## 6 0.0061032037 0.1225999
#protection
outcome_3mos <- factor(wgcna_eset$`uninfected, 3 months`)
design <- model.matrix(~0+outcome_3mos+site+treatment)
colnames(design)
## [1] "outcome_3mosinfected"       "outcome_3mosnever infected"
## [3] "siteWagai"                  "treatment4.5 x 10^5 PfSPZ" 
## [5] "treatment9.0 x 10^5 PfSPZ"  "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("not_protected","protected", "wagai", "lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(protected - not_protected,
                           levels=design) 
contrasts
##                Contrasts
## Levels          protected - not_protected
##   not_protected                        -1
##   protected                             1
##   wagai                                 0
##   lowdose                               0
##   mediumdose                            0
##   highdose                              0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)

p_vs_np_3mos <- topTable(fit2, coef="protected - not_protected", number = Inf) %>%
  rownames_to_column("module") %>%
  right_join(., feature_dat %>%
               rownames_to_column("module"),
             by = "module") %>%
  mutate(comparison = "protected_vs_not_protected") %>%
  dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
head(p_vs_np_3mos)
##                   comparison         module module_color hub_gene       logFC
## 1 protected_vs_not_protected       MEbrown4       brown4    SEC62  0.02060079
## 2 protected_vs_not_protected MEmidnightblue midnightblue    RIOK3  0.01897438
## 3 protected_vs_not_protected       MEcoral4       coral4    KDM5D -0.01675378
## 4 protected_vs_not_protected        MEblue2        blue2   ADGRE5 -0.01608296
## 5 protected_vs_not_protected      MEsalmon2      salmon2   TRIP12  0.01595809
## 6 protected_vs_not_protected     MEmagenta3     magenta3    PRR13 -0.01575198
##      P.Value adj.P.Val
## 1 0.02056804 0.8625497
## 2 0.03284857 0.8625497
## 3 0.05980260 0.8625497
## 4 0.07063734 0.8625497
## 5 0.07310500 0.8625497
## 6 0.07689212 0.8625497
#CSP Ab response 
FC_CSPAb <- factor(wgcna_eset$`fold-change anti-CSP IgG`)
design <- model.matrix(~0+FC_CSPAb+site+treatment) 
colnames(design)
## [1] "FC_CSPAblow responder"     "FC_CSPAbhigh responder"   
## [3] "siteWagai"                 "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("low_responder","high_responder","wagai","lowdose","mediumdose","highdose")
fit <- lmFit(exprs_dat, design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(high_responder - low_responder,
                           levels=design) 
contrasts
##                 Contrasts
## Levels           high_responder - low_responder
##   low_responder                              -1
##   high_responder                              1
##   wagai                                       0
##   lowdose                                     0
##   mediumdose                                  0
##   highdose                                    0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)
hi_vs_low_CSP <- topTable(fit2, coef="high_responder - low_responder", number = Inf) %>%
  rownames_to_column("module") %>%
  right_join(., feature_dat %>%
               rownames_to_column("module"),
             by = "module") %>%
  mutate(comparison = "hi_vs_low_CSP") %>%
  dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
head(hi_vs_low_CSP)
##      comparison        module module_color   hub_gene       logFC     P.Value
## 1 hi_vs_low_CSP  MEindianred4   indianred4      IL1RN -0.03855345 0.006462946
## 2 hi_vs_low_CSP      MEviolet       violet AC003991.3 -0.03741874 0.008128466
## 3 hi_vs_low_CSP     MEskyblue      skyblue      NFAM1 -0.03484879 0.013560050
## 4 hi_vs_low_CSP       MEplum3        plum3     LRRC25 -0.03425417 0.015312087
## 5 hi_vs_low_CSP      MEpurple       purple      RNF24 -0.03413091 0.015647248
## 6 hi_vs_low_CSP MEdarkorange2  darkorange2      PTAFR -0.03353148 0.017530385
##   adj.P.Val
## 1 0.3564512
## 2 0.3564512
## 3 0.3564512
## 4 0.3564512
## 5 0.3564512
## 6 0.3564512
all_module_trait_dat <- rbind(Pf_during_vax, CSPAb_baseline, hi_vs_low_CSP, p_vs_np_3mos) %>%
  arrange(P.Value)

p_vs_np_3mos_select <- p_vs_np_3mos %>%
  filter(P.Value<0.05)

all_module_trait_hm_dat <- all_module_trait_dat %>%
  filter(hub_gene %in% p_vs_np_3mos_select$hub_gene) %>%
  dplyr::select(comparison, module_color, hub_gene, logFC) %>%
  pivot_wider(names_from = comparison, values_from = logFC) %>%
  column_to_rownames(var = "hub_gene")

#lapply(WGCNA_dat_select, nrow)
### Heatamp for WGCNA Limma data
#Look only at modules that were significantly different between P vs NP, which is RIOK3 and SEC62

library(tidyverse)
library(ComplexHeatmap)
## Loading required package: grid
## 
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
## 
##     pattern
## ========================================
## ComplexHeatmap version 2.17.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
p_vs_np_3mos_select <- p_vs_np_3mos %>%
  filter(P.Value < 0.05) %>%
  dplyr::select(comparison, hub_gene, module_color, logFC, P.Value, adj.P.Val)

Pf_during_vax_select <- Pf_during_vax %>%
  filter(hub_gene %in% p_vs_np_3mos_select$hub_gene) %>%
  dplyr::select(comparison, hub_gene, module_color, logFC, P.Value, adj.P.Val)

CSPAb_baseline_select <- CSPAb_baseline %>%
  filter(hub_gene %in% p_vs_np_3mos_select$hub_gene) %>%
  dplyr::select(comparison, hub_gene, module_color, logFC, P.Value, adj.P.Val)

hi_vs_low_CSP_select <- hi_vs_low_CSP %>%
  filter(hub_gene %in% p_vs_np_3mos_select$hub_gene) %>%
  dplyr::select(comparison, hub_gene, module_color, logFC, P.Value, adj.P.Val)
  

all_module_trait_binary_hm_dat <- bind_rows(p_vs_np_3mos_select,
                                     Pf_during_vax_select,
                                     CSPAb_baseline_select,
                                     hi_vs_low_CSP_select)

all_module_trait_binary_hm_mat <- all_module_trait_binary_hm_dat %>%
  dplyr::select(comparison, hub_gene, logFC) %>%
  pivot_wider(names_from = "comparison", values_from = "logFC", id_cols = hub_gene) %>%
  column_to_rownames(var = "hub_gene") %>%
  as.matrix()


clab <- list(
  "hub_gene" = c("RIOK3" = "midnightblue", "SEC62" = "brown4")
  )

clab <- rev(clab)

row_ha_df <-  all_module_trait_binary_hm_dat %>%
  filter(comparison == "protected_vs_not_protected") %>%
  dplyr::select(hub_gene)
row_ha <- rowAnnotation(df = row_ha_df, col = clab, na_col = "lavender", annotation_name_gp = gpar(fontsize = 7),
                        simple_anno_size = unit(0.2, "inches"))

paletteLength <- 50
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1*max(abs(all_module_trait_binary_hm_mat)), 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(max(all_module_trait_binary_hm_mat)/paletteLength, max(abs(all_module_trait_binary_hm_mat)), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("#548FCB", "white", "#B83434"))(length(myBreaks))

my_hm <- Heatmap(all_module_trait_binary_hm_mat,
        col = circlize::colorRamp2(myBreaks, myColor),
        color_space = "LAB",
        row_names_side = "left",
        row_dend_side = "right",
        left_annotation = row_ha,
        border = TRUE)
draw(my_hm)

my_hub_genes <- c(spearman_signif_hub_genes, unique(all_module_trait_binary_hm_dat$hub_gene))
my_scatter_plots <- all_MEs %>%
  filter(GeneSymbol %in% my_hub_genes) %>%
  mutate(trait = factor(trait, levels = c("pre-vax anti-CSP IgG", "parasitemic events during vax period",  "log2FC anti-CSP IgG", "days to first parasitemia"))) %>%
  mutate(GeneSymbol = factor(GeneSymbol, levels =  my_hub_genes)) %>%
  ggplot(., aes(x = trait_value, y = module_eigenvalue)) +
  geom_point() +
  geom_smooth(method = "lm") +
  xlab("trait value") +
  ylab("module eigenvalue") +
  stat_cor(method = "spearman",
           cor.coef.name = "rho") +
  theme_bw() +
  ggh4x::facet_grid2(GeneSymbol~trait, scales = "free", switch = "y")

Plot individual beeswarmplot plots for protected vs non-protected

beeswarmplot_dat <- orderMEs(net$MEs) %>% 
  rownames_to_column(var = "subject") %>%
  mutate(subject = gsub("\\_0", "", subject)) %>%
  right_join(., pData(wgcna_eset) %>%
               dplyr::select(-c(site, treat)) %>%
               rownames_to_column(var = "subject"),
             by = "subject") %>%
  pivot_longer(cols = contains("ME"), names_to = "module_label", values_to = "module_eigenvalue") %>%
  mutate(module_color = gsub("ME", "", module_label)) %>%
    left_join(., all_modules,
              by = "module_color") %>%
  filter(GeneSymbol %in% my_hub_genes)
library(tidyverse)
library(broom)
beeswarmplot_dat_factor <- beeswarmplot_dat %>%
  mutate(GeneSymbol = factor(GeneSymbol, levels = my_hub_genes)) %>%
  mutate(protected_3mos = ifelse(`uninfected, 3 months` == "infected", "NP", "P")) %>%
  pivot_longer(cols = c("fold-change anti-CSP IgG":"uninfected, 3 months", protected_3mos), names_to = "trait", values_to = "value")  %>%
  mutate(trait = factor(trait, levels = c("parasitemic during vax period",
                                          "pre-vax anti-CSP IgG",
                                          "uninfected, 3 months",
                                          "protected_3mos",
                                          "fold-change anti-CSP IgG"))) %>%
  filter(trait == "protected_3mos")

beeswarmplot_dat_factor_norm_test <- beeswarmplot_dat_factor %>%
  group_by(GeneSymbol, trait)%>% 
  do(tidy(shapiro.test(.$module_eigenvalue))) %>% 
  ungroup() %>%
  dplyr::select(-method) %>%
  mutate(is_normal = ifelse(p.value<0.05, "no","yes"))
  
beeswarmplot_dat_factor_n <- beeswarmplot_dat_factor %>%
  group_by(GeneSymbol, trait) %>%
  summarise(n = n())
## `summarise()` has grouped output by 'GeneSymbol'. You can override using the
## `.groups` argument.
data_summary <- function(x) {
  m <- mean(x, na.rm=TRUE)
  sem <-sd(x, na.rm=TRUE)/sqrt(sum(!is.na(x)))
  ymin<-m-1.96*sem
  ymax<-m+1.96*sem
  return(c(y=m,ymin=ymin,ymax=ymax))
}


all_beeswarm_plot <- beeswarmplot_dat_factor %>%
  ggplot(., aes(x=value, y=module_eigenvalue)) +
  ggbeeswarm::geom_beeswarm(alpha = 0.5, color = "lightblue") +
  stat_summary(fun="mean", geom="segment", mapping=aes(xend=..x.. - 0.1, yend=..y..), linewidth = 0.5)+
  stat_summary(fun="mean", geom="segment", mapping=aes(xend=..x.. + 0.1, yend=..y..), linewidth = 0.5) +
  stat_summary(fun.data=data_summary, geom="errorbar", width=0.1) +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  ylab("module eigenvalue") +
  xlab("") +
  theme_bw() +
  ggh4x::facet_grid2(GeneSymbol~trait, scales = "free", switch = "y") #+
  # theme(axis.text = element_text(colour = "black"),
  #       strip.background = element_blank(),
  #       axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
  #       plot.title = element_text(size=8))

Figure S6A of revised manuscript

ggarrange(my_scatter_plots, all_beeswarm_plot, widths = c(4,1.15))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The dot-dot notation (`..x..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(x)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Make revised heatmap (Figure 4B, JCI revision)

This will show Spearman’s rho as color scale.

cor_dat <- my_scatter_plots$data %>%
  group_by(GeneSymbol, trait) %>%
  summarize(rho = cor.test(module_eigenvalue, trait_value, method = "spearman")$estimate,
            p = cor.test(module_eigenvalue, trait_value, method = "spearman")$p.value)
## Warning: There were 48 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `rho = cor.test(module_eigenvalue, trait_value, method =
##   "spearman")$estimate`.
## ℹ In group 1: `GeneSymbol = BTLA`, `trait = pre-vax anti-CSP IgG`.
## Caused by warning in `cor.test.default()`:
## ! Cannot compute exact p-value with ties
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 47 remaining warnings.
## `summarise()` has grouped output by 'GeneSymbol'. You can override using the
## `.groups` argument.
hm_cor_mat <- cor_dat %>%
  dplyr::select(-p) %>%
  pivot_wider(names_from = trait, values_from = rho) %>%
  column_to_rownames(var = "GeneSymbol") %>%
  as.matrix()
mytopmodules_list <- all_modules %>%
  filter(GeneSymbol %in% my_hub_genes) %>%
  mutate(GeneSymbol = factor(GeneSymbol, levels = my_hub_genes)) %>%
  dplyr::select(GeneSymbol, module_color) %>%
  deframe()

clab <- list(
  "hub_gene" = mytopmodules_list
  )
clab <- rev(clab)

row_ha_df <-  all_modules %>%
  filter(GeneSymbol %in% my_hub_genes) %>%
  dplyr::select(GeneSymbol) %>%
  mutate(GeneSymbol = factor(GeneSymbol, levels = my_hub_genes)) %>%
  arrange(GeneSymbol) %>%
  dplyr::rename(hub_gene = "GeneSymbol")

hm_cor_mat <- hm_cor_mat[levels(row_ha_df$hub_gene),]

row_ha <- rowAnnotation(df = row_ha_df, col = clab, na_col = "lavender", annotation_name_gp = gpar(fontsize = 7),
                        simple_anno_size = unit(0.1, "inches"),
                        show_legend = FALSE)

paletteLength <- 50
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1*max(abs(hm_cor_mat)), 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(max(hm_cor_mat)/paletteLength, max(abs(hm_cor_mat)), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("#548FCB", "white", "#B83434"))(length(myBreaks))

my_hm <- Heatmap(hm_cor_mat,
        col = circlize::colorRamp2(myBreaks, myColor),
        color_space = "LAB",
        row_names_side = "left",
        row_dend_side = "right",
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        left_annotation = row_ha,
        border = TRUE)

Limma heatmap for P vs NP only

 hm_limma_mat <- p_vs_np_3mos %>%
  filter(hub_gene %in% my_hub_genes) %>%
  mutate(trait = "protection_3mos") %>%
  dplyr::select(hub_gene, trait, logFC) %>%
  pivot_wider(names_from = trait, values_from = logFC) %>%
  column_to_rownames(var = "hub_gene") %>%
  as.matrix(., nrow(.), byrow=TRUE)
  
mytopmodules_list <- all_modules %>%
  filter(GeneSymbol %in% my_hub_genes) %>%
  mutate(GeneSymbol = factor(GeneSymbol, levels = my_hub_genes)) %>%
  dplyr::select(GeneSymbol, module_color) %>%
  deframe()

clab <- list(
  "hub_gene" = mytopmodules_list
  )
clab <- rev(clab)

row_ha_df <-  all_modules %>%
  filter(GeneSymbol %in% my_hub_genes) %>%
  dplyr::select(GeneSymbol) %>%
  mutate(GeneSymbol = factor(GeneSymbol, levels = my_hub_genes)) %>%
  arrange(GeneSymbol) %>%
  dplyr::rename(hub_gene = "GeneSymbol")

hm_limma_mat <- hm_limma_mat[levels(row_ha_df$hub_gene), 1, drop = FALSE]

row_ha <- rowAnnotation(df = row_ha_df, col = clab, na_col = "lavender", annotation_name_gp = gpar(fontsize = 7),
                        simple_anno_size = unit(0.1, "inches"))

paletteLength <- 50
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1*max(abs(hm_limma_mat)), 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(max(hm_limma_mat)/paletteLength, max(abs(hm_limma_mat)), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("#548FCB", "white", "#B83434"))(length(myBreaks))

my_limma_hm <- Heatmap(hm_limma_mat,
        col = circlize::colorRamp2(myBreaks, myColor),
        color_space = "LAB",
        cluster_rows = FALSE,
        row_names_side = "left",
        row_dend_side = "right",
        #left_annotation = row_ha,
        border = TRUE)
my_hm + my_limma_hm

Redo network graphs

foo <- all_modules %>%
  filter(GeneSymbol %in% my_hub_genes)
myColors <- foo$module_color
topmodules <- chooseTopHubInEachModule(WGCNA_matrix, net$colors, omitColors = "grey", power = 12.5, type ="signed")
mytopmodules <- topmodules[myColors] %>%
  as.data.frame() %>%
  dplyr::rename(EnsemblID = ".") %>%
  rownames_to_column("module_label") %>%
  as_tibble() %>%
  left_join(., fData(x) %>%
              dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")
devtools::source_url("https://github.com/jtlovell/limmaDE2/blob/master/R/wgcna2igraph.R?raw=TRUE")
graph <- wgcna2igraph(net = net, WGCNA_matrix, modules2plot = myColors,
                      colors2plot = myColors,
                      kME.threshold = 0.5, adjacency.threshold = 0.1,
                      adj.power = power, verbose = T,
                      node.size = 1, frame.color = NA, node.color = scales::muted("red"),
                      edge.alpha = .7, edge.width = 0.5)
## using KME values to find genes with high module membership
## subsetting to modules: brown4, lightblue4, lightgreen, midnightblue, mistyrose, paleturquoise 
## culling edges by adjacency
## removing unconnected nodes
## coverting to igraph format
## returning a network with 316 nodes and 8675 edges
hubscores <- hub_score(graph, scale = TRUE, weights = NULL,
  options = arpack_defaults)

Plot network graph of significant modules (Figure 4C of JCI revision)

Network graphs of significant modules containing nodes (red dots) and edges (lines) meeting minimum thresholds. Correlations between nodes in different modules are shown as black edges.

plot(graph)

List number of genes per module

downselected_MEs <- paste0("ME", myColors)
WGCNA_dat_select <- c()
for(i in downselected_MEs){
  module.color <- sub("ME","", i)
  module.colname <- paste0("kME", i)
  WGCNA_dat_select[[i]] <- WGCNA_dat %>% 
    filter(ModuleColors == module.color) %>%
    dplyr::select(GeneSymbol, all_of(module.colname))
}
lapply(WGCNA_dat_select, nrow)
## $MEbrown4
## [1] 93
## 
## $MElightblue4
## [1] 47
## 
## $MElightgreen
## [1] 136
## 
## $MEmidnightblue
## [1] 156
## 
## $MEmistyrose
## [1] 21
## 
## $MEpaleturquoise
## [1] 110

Over-representation Analysis

#closeAllConnections()
source("https://raw.githubusercontent.com/TranLab/kspzv1-systems/main/helper/ApplyORA2Genesets.R")
myuniverse <- unique(fData(x)[fData(x)$EnsemblID %in% colnames(WGCNA_matrix),]$GeneSymbol)
ORA_baseline_bound_df <- c()
minSize <- 20
downselected_MEs
## [1] "MEbrown4"        "MElightblue4"    "MElightgreen"    "MEmidnightblue" 
## [5] "MEmistyrose"     "MEpaleturquoise"
#Combine similar modules skyblue1, mediumpurple1, thistle3 in to one
WGCNA_dat_select2 <- list()
WGCNA_dat_select2$NKG2E <- WGCNA_dat_select$MElightblue4
WGCNA_dat_select2$LTF <- WGCNA_dat_select$MEmistyrose
WGCNA_dat_select2$BLTA_CD22 <- data.table::rbindlist(list(WGCNA_dat_select$MEpaleturquoise,
                                                WGCNA_dat_select$MElightgreen)) %>%
  as.data.frame() %>%
  dplyr::rename(kMEMEall = "kMEMEpaleturquoise") 
WGCNA_dat_select2$RIOK3_SEC62 <- data.table::rbindlist(list(WGCNA_dat_select$MEmidnightblue,
                                                WGCNA_dat_select$MEbrown4)) %>%
  as.data.frame() %>%
  dplyr::rename(kMEMEall = "kMEMEmidnightblue")
for(k in names(WGCNA_dat_select2)){
  WGCNA_dat_select2[[k]] <- WGCNA_dat_select2[[k]][order(WGCNA_dat_select2[[k]][,2], decreasing = TRUE),]
  ORA_baseline_bound_df[[k]] <- ApplyORA2Genesets(genelist = WGCNA_dat_select2[[k]]$GeneSymbol,
                                                  geneset = "all",
                                                  universe = myuniverse,
                                                  output_directory = tempdir(),
                                                  filename_prefix = paste0("ORA_Mod_Corr_Protect_3_mos_", k,
                                                                           "_minSize", minSize),
                                                  minSize = minSize)
  }
## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"
NKG2E_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$NKG2E, .id = "module_type") %>%
  mutate(module = "lightblue4") %>%
  mutate(module_size = nrow(WGCNA_dat_select$MElightblue4)) %>%
  mutate(hub_gene = "NKG2E") 
LTF_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$LTF, .id = "module_type") %>%
  mutate(module = "mistyrose") %>%
  mutate(module_size = nrow(WGCNA_dat_select$MEmistyrose)) %>%
  mutate(hub_gene = "LTF")
BLTA_CD22_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$BLTA_CD22, .id = "module_type") %>%
  mutate(module = "paleturquoise and lightgreen") %>%
  mutate(module_size = nrow(WGCNA_dat_select2$BLTA_CD22)) %>%
  mutate(hub_gene = "BLTA_CD22")
RIOK3_SEC62_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$RIOK3_SEC62, .id = "module_type") %>%
  mutate(module = "midnightblue and brown4") %>%
  mutate(module_size = nrow(WGCNA_dat_select2$RIOK3_SEC62)) %>%
  mutate(hub_gene = "RIOK3_SEC62")


ORA_baseline_allbound <- bind_rows(NKG2E_ORA_baseline_bound,
                                  LTF_ORA_baseline_bound,
                                  BLTA_CD22_ORA_baseline_bound,
                                  RIOK3_SEC62_ORA_baseline_bound) %>%
  mutate(neglogpadj = -log10(padj)) %>%
  mutate(pct_overlap = 100*(overlap/module_size)) %>%
  dplyr::select(module, module_size, hub_gene, module_type, pathway, overlap,
                pct_overlap, size, overlapGenes, pval, padj, neglogpadj)

Plot WGCNA over-representation results, JCI revision

delta analysis

myModuleTypes <- c("MSigDB_Hallmark_v7.4", "MSigDB_C2_kegg_v7.4", "highBTMs", "BloodGen3Module")
myORAClusterPlotDat <- ORA_baseline_allbound %>%
  mutate(pathway = gsub("VS", "v", pathway)) %>%
  mutate(pathway = gsub("Vd", "Vδ", pathway)) %>%
  mutate(pathway = gsub("gd", "γδ", pathway)) %>%
  mutate(pathway = sub(".*?\\_", "", pathway)) %>%
  mutate(pathway = fct_reorder(pathway, neglogpadj))  %>%
  arrange(desc(neglogpadj))%>%
  mutate(TextLabelColor = ifelse(module_type == "BloodGen3Module", scales::muted("red"),
                                 ifelse(module_type == "MSigDB_C2_kegg_v7.4", scales::muted("blue"),
                                        ifelse(module_type == "MSigDB_Hallmark_v7.4", "black","gray")))) %>%
  filter(padj < 0.01) %>%
  filter(pct_overlap >= 5) %>%
  filter(module_type %in% myModuleTypes) %>%
  group_by(hub_gene) %>%
  arrange(neglogpadj) %>%
  slice_tail(n = 4) %>%
  ungroup()

Arrange plots

addSmallLegend <- function(myPlot, pointSize = 1.5, textSize = 3, spaceLegend = 0.3) {
    myPlot +
        guides(shape = guide_legend(override.aes = list(size = pointSize)),
               color = guide_legend(override.aes = list(size = pointSize))) +
        theme(legend.title = element_text(size = textSize), 
              legend.text  = element_text(size = textSize),
              legend.key.size = unit(spaceLegend, "lines"))
}

scale_begin <- floor(min(myORAClusterPlotDat$pct_overlap, na.rm = TRUE))
scale_end <- ceiling(max(myORAClusterPlotDat$pct_overlap, na.rm = TRUE))

myArrangedPlot <- myORAClusterPlotDat %>%
  mutate(pathway = fct_reorder(pathway, neglogpadj))  %>%
  ggplot(., aes(x = neglogpadj, y = pathway, fill = pct_overlap)) +
  geom_bar(stat = 'identity') +
  geom_vline(xintercept = 2, color = "red", linetype = "dotted") +
  scale_fill_distiller(direction = 1, breaks = c(0, 10, 20, 30, 40, 50), limits = c(0,50)) +
  ylab("module") +
  theme_classic(base_family = "sans", base_size = 14) +
  theme(legend.position = "bottom",
        plot.margin = unit(c(0,0.5,0,0.5), "cm")) +
  facet_wrap(~hub_gene, scales ="free") +
  theme(strip.background = element_blank())

myfont <- "Helvetica"
bubble_max_size <- 15
basetextsize <- 10
myORABubblePlot <- myORAClusterPlotDat %>%
    mutate(pathway = fct_reorder(pathway, neglogpadj))  %>%
  ggplot(., aes(x = hub_gene, y = pathway)) +
  geom_point(aes(size=pct_overlap, fill = neglogpadj), alpha = 0.65, shape=21, stroke = 0.25) +
  scale_size_area(name = expression(-log[10]~adj.~p~value), max_size = bubble_max_size, breaks = c(1, 5, 10, 20, 40, 60), limits = c(1,75)) +
  viridis::scale_fill_viridis(option= "A", begin = 0.25, end = 0.75, alpha = 0.8, direction = -1, name = "% overlap") +
  hrbrthemes::theme_ipsum_es(base_family = myfont, base_size = basetextsize) +
  scale_x_discrete(limits=rev) +
  scale_y_discrete(position = "right") +
  ylab("pathway/module") +
  xlab("network hub gene(s)") +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 90, vjust = 1, hjust=0)) +
  coord_flip()

Delta WGCNA Top Modules ORA bubble plots (Figure S6B, JCI revision)